More network analysis for Boatramp site.
Important dates:
min(merged_br$DATETIME)
## [1] "2019-07-23 22:45:59 CDT"
max(merged_br$DATETIME)
## [1] "2019-10-29 00:06:05 CDT"
br_natural<- merged_br %>% filter( DATETIME < as.POSIXct("2019-07-30 12:15:00", tz="CDT"))
br_corn<- merged_br %>% filter(DATETIME >=as.POSIXct("2019-07-30 12:15:00", tz="CDT") & DATETIME < as.POSIXct("2019-08-16 15:00:00", tz="CDT"))
br_netting<- merged_br %>% filter(DATETIME > as.POSIXct("2019-08-16 15:00:00", tz="CDT"))
#subdivide other sites by these same date distinctions
br2_natural<- merged_br2 %>% filter( DATETIME < as.POSIXct("2019-07-30 12:15:00", tz="CDT"))
br2_corn<- merged_br2 %>% filter(DATETIME >=as.POSIXct("2019-07-30 12:15:00", tz="CDT") & DATETIME < as.POSIXct("2019-08-16 15:00:00", tz="CDT"))
br2_netting<- merged_br2 %>% filter(DATETIME > as.POSIXct("2019-08-16 15:00:00", tz="CDT"))
crown_natural<- merged_crown %>% filter( DATETIME < as.POSIXct("2019-07-30 12:15:00", tz="CDT"))
crown_corn<- merged_crown %>% filter(DATETIME >=as.POSIXct("2019-07-30 12:15:00", tz="CDT") & DATETIME < as.POSIXct("2019-08-16 15:00:00", tz="CDT"))
crown_netting<- merged_crown %>% filter(DATETIME > as.POSIXct("2019-08-16 15:00:00", tz="CDT"))
How many days out of the total number of sampling days did fish visit any of the antenna at Boat Ramp?
Define function to calculate number of visits per site @param df- dataframe with $PIT ID column @param num_days- number of days over which to loop @param start_date- day at which to start looop @return superfish df
calc_nvisit<-function(df,num_days,start_date){
superfish<-data.frame(fishID=unique(df$PIT), nvisit=rep(NA,length(unique(df$PIT))))
for (i in 1:length(unique(df$PIT))){
fishID<-unique(df$PIT)[i]
visit_count<-0
for (j in 1:num_days){
sub_combined<-df %>% filter(DATETIME >= as.POSIXct(start_date+ (j-1)*86400, tz="CDT") & DATETIME <= as.POSIXct(start_date + j*86400, tz="CDT")) #86400 seconds/24 hours
fishID %in% sub_combined$PIT
if(fishID %in% sub_combined$PIT){
visit_count<-visit_count+1
}
}
superfish$nvisit[i]<-visit_count
}
return(superfish)
}
dates<-unique(br_natural$DATETIME) #how many dates of observation in dataset?
(min_date<-min(dates))
## [1] "2019-07-23 22:45:59 CDT"
(max_date<-max(dates))
## [1] "2019-07-25 21:55:15 CDT"
start_date<-as.POSIXct("2019-07-23 12:00:00 CDT") #rounded to nearest midday
end_date<-as.POSIXct("2019-07-26 12:00:00 CDT")
num_days<-as.numeric(end_date-start_date)
superfish_br<-calc_nvisit(df=br_natural, num_days=num_days, start_date=start_date)
hist(superfish_br$nvisit, xlab="Number of sampling days", main="")
dates<-unique(br_corn$DATETIME) #how many dates of observation in dataset?
(min_date<-min(dates))
## [1] "2019-08-01 12:10:07 CDT"
(max_date<-max(dates))
## [1] "2019-08-16 13:07:26 CDT"
start_date<-as.POSIXct("2019-08-01 12:00:00 CDT") #rounded to nearest midday
end_date<-as.POSIXct("2019-08-16 12:00:00 CDT")
num_days<-as.numeric(end_date-start_date)
superfish_br<-calc_nvisit(df=br_corn, num_days=num_days, start_date=start_date)
# superfish_br2<-calc_nvisit(df=br2_corn, num_days=num_days, start_date=start_date) #sampling period does not overlap
superfish_crown<-calc_nvisit(df=crown_corn, num_days=num_days, start_date=start_date)
hist(superfish_br$nvisit, xlab="Number of sampling days", main="BR Corn")
hist(superfish_crown$nvisit, xlab="Number of sampling days", main="Crown Corn")
# top20<-round(nrow(superfish)*.2)
# superfish<-superfish[order(superfish$nvisit, decreasing= TRUE),]
# top20<-superfish[1:top20,]
combo<-merge(superfish_br, superfish_crown, by="fishID", all.x=TRUE, all.y=TRUE)
combo[is.na(combo)] <- 0
library(reshape2)
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
test<-melt(combo, id.vars="fishID")
test<-test[order(test$value, decreasing=TRUE),]
combo$total<-combo$nvisit.x+ combo$nvisit.y
combo<-combo[order(combo$total, decreasing=TRUE),]
test$fishID <- factor(test$fishID, levels = combo$fishID[order(combo$total, decreasing=TRUE)])
top20<-round(length(levels(test$fishID))*.2)
cutoff<-levels(test$fishID)[top20]
# library
library(ggplot2)
ggplot(test, aes(fill=variable, y=value, x=reorder(fishID, -value)))+
geom_col()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
scale_fill_discrete(name = "Site", labels = c("Boat Ramp", "Crown"))+
labs(y= "Visits", x = "Carp ID")+
geom_vline(xintercept=which(test$fishID == cutoff))
dates<-unique(br_netting$DATETIME) #how many dates of observation in dataset?
(min_date<-min(dates))
## [1] "2019-08-16 17:00:20 CDT"
(max_date<-max(dates))
## [1] "2019-10-29 00:06:05 CDT"
start_date<-as.POSIXct("2019-08-16 12:00:00 CDT") #rounded to nearest midday
end_date<-as.POSIXct("2019-10-29 12:00:00 CDT")
num_days<-as.numeric(end_date-start_date)
superfish_br<-calc_nvisit(df=br_netting, num_days=num_days, start_date=start_date)
superfish_br2<-calc_nvisit(df=br2_netting, num_days=num_days, start_date=start_date) #sampling period does not overlap
superfish_crown<-calc_nvisit(df=crown_netting, num_days=num_days, start_date=start_date)
hist(superfish_br$nvisit, xlab="Number of sampling days", main="BR Netting")
hist(superfish_br2$nvisit, xlab="Number of sampling days", main="BR2 Netting")
hist(superfish_crown$nvisit, xlab="Number of sampling days", main="Crown Netting")
# top20<-round(nrow(superfish)*.2)
# superfish<-superfish[order(superfish$nvisit, decreasing= TRUE),]
# top20<-superfish[1:top20,]
combo<-merge(superfish_br, superfish_br2, by="fishID", all.x=TRUE, all.y=TRUE)
combo<-merge(combo, superfish_crown, by="fishID", all.x=TRUE, all.y=TRUE)
combo[is.na(combo)] <- 0
library(reshape2)
test<-melt(combo, id.vars="fishID")
test<-test[order(test$value, decreasing=TRUE),]
combo$total<-combo$nvisit.x+ combo$nvisit.y + combo$nvisit
combo<-combo[order(combo$total, decreasing=TRUE),]
test$fishID <- factor(test$fishID, levels = combo$fishID[order(combo$total, decreasing=TRUE)])
top20<-round(length(levels(test$fishID))*.2)
cutoff<-levels(test$fishID)[top20]
# library
library(ggplot2)
ggplot(test, aes(fill=variable, y=value, x=as.factor(fishID)))+
geom_col()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
scale_fill_discrete(name = "Site", labels = c("Boat Ramp", "Boat Ramp II", "Crown"))+
labs(y= "Visits", x = "Carp ID")+
geom_vline(xintercept=which(test$fishID == cutoff))
Treating all antenna as a single site/equivalent
dates<-unique(br_corn$DATETIME) #how many dates of observation in dataset?
(min_date<-min(dates))
## [1] "2019-08-01 12:10:07 CDT"
(max_date<-max(dates))
## [1] "2019-08-16 13:07:26 CDT"
start_date<-as.POSIXct("2019-08-01 12:00:00 CDT") #rounded to nearest midday
end_date<-as.POSIXct("2019-08-16 12:00:00 CDT")
num_hours<-as.numeric(end_date-start_date)*24
network_df <-data.frame(day=1:num_hours, nnodes=rep(NA,num_hours), mean_degree=rep(NA, num_hours), transitivity=rep(NA, num_hours), edge_density=rep(NA, num_hours), diameter=rep(NA, num_hours), centralization=rep(NA, num_hours), eigen= rep(NA, num_hours), btwn= rep(NA, num_hours),modularity=rep(NA, num_hours))
degree_list<-NULL
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:igraph':
##
## crossing
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
for(i in 1:num_hours){
sub_combined<-merged_br %>% filter(DATETIME >= as.POSIXct(start_date+ (i-1)*3600, tz="CDT") & DATETIME <= as.POSIXct(start_date + i*3600, tz="CDT")) #3600 seconds/hour
# print(sub_combined)
edgelist<-data.frame(PIT=sub_combined$PIT, Reader=sub_combined$Site)
network_df$nnodes[i]<-length(unique(sub_combined$PIT))
if(nrow(edgelist)>0){
A <- spMatrix(nrow=length(unique(edgelist$PIT)),
ncol=length(unique(edgelist$Reader)),
i = as.numeric(factor(edgelist$PIT)),
j = as.numeric(factor(edgelist$Reader)),
x = rep(1, length(as.numeric(edgelist$PIT))) )
row.names(A) <- levels(factor(edgelist$PIT))
colnames(A) <- levels(factor(edgelist$Reader))
bi<-graph.incidence(A, mode="all") #undirected, named graph that is bipartite
pr<-bipartite.projection(bi)
#co-membership network of nodes ($proj1), or a network of groups that share members ($proj2)
plot(pr$proj1, main=paste("Hour =", i))
# g<- ggplot(data=sub_combined, aes(DATETIME, as.factor(PIT), color=Antenna)) +
# geom_point(alpha=0.5)
# print(g)
network_df$mean_degree[i]<-mean(degree_distribution(pr$proj1))
network_df$transitivity[i]<-transitivity(pr$proj1, "global")
network_df$edge_density[i]<-edge_density(pr$proj1, loops=F)
network_df$diameter[i]<-diameter(pr$proj1, directed=F, weights=NA)
network_df$centralization[i]<-centr_degree(pr$proj1, loops=FALSE, normalized=T)$centralization
network_df$eigen[i]<-centr_eigen(pr$proj1, directed=F, normalized=T)$centralization
network_df$btwn[i]<-centr_betw(pr$proj1, directed=F, normalized=T)$centralization
network_df$modularity[i]<-modularity(pr$proj1, membership(cluster_walktrap(pr$proj1)))
}
}
head(network_df)
## day nnodes mean_degree transitivity edge_density diameter centralization
## 1 1 6 0.16666667 1 1 1 0
## 2 2 7 0.14285714 1 1 1 0
## 3 3 11 0.09090909 1 1 1 0
## 4 4 9 0.11111111 1 1 1 0
## 5 5 10 0.10000000 1 1 1 0
## 6 6 12 0.08333333 1 1 1 0
## eigen btwn modularity
## 1 2.220446e-16 0 -0.16666667
## 2 0.000000e+00 0 -0.14285714
## 3 1.973730e-16 0 -0.09090909
## 4 2.537653e-16 0 -0.11111111
## 5 4.440892e-16 0 -0.10000000
## 6 0.000000e+00 0 -0.08333333
library(tidyr)
for(i in 1:num_hours){
sub_combined<-merged_br %>% filter(DATETIME >= as.POSIXct(start_date+ (i-1)*3600, tz="CDT") & DATETIME <= as.POSIXct(start_date + i*3600, tz="CDT")) #3600 seconds/hour
# print(sub_combined)
edgelist<-data.frame(PIT=sub_combined$PIT, Reader=sub_combined$Antenna)
network_df$nnodes[i]<-length(unique(sub_combined$PIT))
if(nrow(edgelist)>0){
A <- spMatrix(nrow=length(unique(edgelist$PIT)),
ncol=length(unique(edgelist$Reader)),
i = as.numeric(factor(edgelist$PIT)),
j = as.numeric(factor(edgelist$Reader)),
x = rep(1, length(as.numeric(edgelist$PIT))) )
row.names(A) <- levels(factor(edgelist$PIT))
colnames(A) <- levels(factor(edgelist$Reader))
bi<-graph.incidence(A, mode="all") #undirected, named graph that is bipartite
pr<-bipartite.projection(bi)
#co-membership network of nodes ($proj1), or a network of groups that share members ($proj2)
plot(pr$proj1, main=paste("Hour =", i))
# g<- ggplot(data=sub_combined, aes(DATETIME, as.factor(PIT), color=Antenna)) +
# geom_point(alpha=0.5)
# print(g)
network_df$mean_degree[i]<-mean(degree_distribution(pr$proj1))
network_df$transitivity[i]<-transitivity(pr$proj1, "global")
network_df$edge_density[i]<-edge_density(pr$proj1, loops=F)
network_df$diameter[i]<-diameter(pr$proj1, directed=F, weights=NA)
network_df$centralization[i]<-centr_degree(pr$proj1, loops=FALSE, normalized=T)$centralization
network_df$eigen[i]<-centr_eigen(pr$proj1, directed=F, normalized=T)$centralization
network_df$btwn[i]<-centr_betw(pr$proj1, directed=F, normalized=T)$centralization
network_df$modularity[i]<-modularity(pr$proj1, membership(cluster_walktrap(pr$proj1)))
}
}
head(network_df)
## day nnodes mean_degree transitivity edge_density diameter centralization
## 1 1 6 0.20000000 0.6000000 0.4000000 2 0.60000000
## 2 2 7 0.14285714 0.9473684 0.9523810 2 0.06666667
## 3 3 11 0.09090909 0.8211679 0.7272727 2 0.33333333
## 4 4 9 0.11111111 0.9344262 0.8333333 2 0.21428571
## 5 5 10 0.10000000 0.7777778 0.6666667 2 0.41666667
## 6 6 12 0.09090909 0.7471264 0.6212121 2 0.34545455
## eigen btwn modularity
## 1 0.60961180 0.400000000 0.1111111
## 2 0.05166852 0.004444444 -0.1050000
## 3 0.24724903 0.105185185 0.0165625
## 4 0.13248532 0.093750000 0.0200000
## 5 0.33769901 0.154320988 0.1666667
## 6 0.30933328 0.157024793 0.1439619